#!/usr/bin/awk -f
BEGIN {
  outputfile="wordLengthSampleFreq.txt"
  enfile="/usr/share/dict/words"
  isfile="islensku_ordalisti.txt"
	while((getline line<enfile) > 0) {
		endict[tolower(line)]=tolower(line)
	}
	close(enfile);
	while((getline line<isfile) > 0) {
		isdict[tolower(line)]=tolower(line)
	}
	close(isfile)
}
{
  gsub(/[^[:alpha:]_[:blank:]]/, "", $0)
  for (i = 1; i <= NF; i++) {
		totalwords++
		if ( isdict[tolower($i)] != "") {
			iscount++
			istotallength=istotallength+length($i)
			if ( length($i) > islongeststring ) islongeststring=length($i) 
			wlfreq[length($i)]++
		}
		else if ( endict[tolower($i)] != "") encount++
		else unknowncount++
		
	}
}
END {
  print "-- Report --"
	print "Inputfile: " ARGV[1]
  print "Total number of words: " totalwords
  print "Total number of icelandic words: " iscount
  printf("Percentage of icelandic words: %.2f%\n"), ((iscount*100)/totalwords)
  print "Longest icelandic word: " islongeststring
	printf("Average icelandic word length: %.2f\n"), (istotallength/iscount)
  print "Total number of english words: " encount
  printf("Percentage of english words: %.2f%\n"), ((encount*100)/totalwords)
  print "Total number of unknown words: " unknowncount
  printf("Percentage of unknown words: %.2f%\n"), ((unknowncount*100)/totalwords)
  for (i = 2; i <= 28; i++) {
		printf("%s\t%.4f\n"), i, (wlfreq[i]*100)/iscount > outputfile
  }
  system("./makePlots")
}
